/***************************************************************************
 *
 * Copyright (C) 2001 International Business Machines
 * All rights reserved.
 *
 * This file is part of the GPFS mmfslinux kernel module.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions 
 * are met:
 *
 *  1. Redistributions of source code must retain the above copyright notice, 
 *     this list of conditions and the following disclaimer. 
 *  2. Redistributions in binary form must reproduce the above copyright 
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution. 
 *  3. The name of the author may not be used to endorse or promote products 
 *     derived from this software without specific prior written
 *     permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 *************************************************************************** */
/*
 * Platform specific synchronization/atomic operations for Linux
 *
 * Note that these should not be directly invoked; instead use the
 * ATOMIC_XXX and ATOMIC_XXXLP macros from <cxiAtomic.h>.
 *
 * Definitions for baseline atomic operations (long/pointer variants)
 *     comp_and_swap(lp)
 *
 * Definitions for atomic operations
 *     fetch_and_add(lp)
 *     fetch_and_and(lp)
 *     fetch_and_or(lp)
 *     compare_and_swap(lp)
 *     _check_lock
 *
 * $Id: cxiAtomic-plat.h,v 1.7 2001/10/11 14:05:21 gjertsen Exp $
 *
 * $Log: cxiAtomic-plat.h,v $
 * Revision 1.7  2001/10/11 14:05:21  gjertsen
 * Provide support for 64-bit mutex lockword for IA64 (LOCKWORD64).
 *
 * Revision 1.6  2001/04/06 13:55:16  dixonbp
 * Fix gcc compile error in comp_and_swap.  Cannot resrict the accumulator
 * when it is in the output list.
 *
 * Revision 1.5  2001/01/08 20:49:02  gjertsen
 * Minor code cleanup. No functional changes.
 *
 * Revision 1.4  2000/12/20 22:20:35  gjertsen
 * Rework IA64 compare and swap operations
 *
 * Revision 1.3  2000/12/15 13:57:18  gjertsen
 * Clean up documentation.
 *
 * Revision 1.2  2000/12/14 20:56:42  wyllie
 * Declare comp_and_swap asm code to modify memory, so that no tricks are
 * needed in passing the address of the word to be modified.
 *
 * Revision 1.1  2000/10/31 15:42:38  gjertsen
 * Account for atomic operations in cxi layer.
 *
 *
 */

#ifndef _h_cxiAtomic_plat
#define _h_cxiAtomic_plat

/* NOTE: need to further split this file into architecture specific headers. */

#include <cxiTypes.h>

/* Baseline atomic operation for i386: comp_and_swap */

#ifdef GPFS_ARCH_I386

/* Use lock by default; necessary for SMP kernel although this
   adds some overhead when running on the UP kernel. */
#define SMPLOCK "lock;"

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
   unsigned char result;

   __asm__  __volatile__(
	    SMPLOCK "cmpxchg %3,%0\n\t"
	    "setz %2"

	    :"=m" (*word_addr),
	     "=a" (*old_val_addr),
	     "=&b" (result)

	    :"r" (new_val),
	     "a" (*old_val_addr)

	    :"cc",
	     "memory");
    return result;
}

#endif /* GPFS_ARCH_I386 */


/* Baseline atomic operations for ia64: comp_and_swap and comp_and_swaplp */

/* Found the HP IA64 ISA guide very useful here:
   http://devresource.hp.com/devresource/Docs/Refs/IA64ISA/ */

#ifdef GPFS_ARCH_IA64

/* Fence operation apparently needed for A/B step processors with cmpxchg */
#define MEM_FENCE "mf;"
#define MASK_LOWER32 0x00000000FFFFFFFFULL

/* Compare the contents of word_addr with the contents of old_val_addr.
   If the values are equal, store new_val in word_addr and return 1.
   Otherwise, set old_val_addr to the current value of word_addr and
   return 0. */

/* compare and swap 4-byte halfword */
static inline int
comp_and_swap(volatile int *word_addr, int *old_val_addr, int new_val)
{
  UInt64 old_val = ((UInt64)*old_val_addr) & MASK_LOWER32;
  UInt64 ret_val;

  /* Ensure mov-to-AR[CCV] in separate instruction group/bundle from cmpxchg
     to handle RAW dependency */
  __asm__ __volatile__ ("mov ar.ccv=%0
                         ;;"
                         :
                         : "rO"(old_val));
  /* Use fence before cmpxchg for now. Use acquire consistancy sem with cmpxchg
     (memory write visible to all subsequent data memory accesses) */
  __asm__ __volatile__ (MEM_FENCE
                        "cmpxchg4.acq %0=[%1],%2,ar.ccv"

                        : "=r"(ret_val)

                        : "r"(word_addr),
                          "r"(new_val)

                        : "memory");

  if (ret_val == old_val)
    return 1;
  else
  {
    *old_val_addr = (int)ret_val;
    return 0;
  }
}

/* compare and swap natural 8-byte word */
static inline int
comp_and_swaplp(volatile long *word_addr, long *old_val_addr, long new_val)
{
  long ret;

  /* Ensure mov-to-AR[CCV] in separate instruction group/bundle from cmpxchg
     to handle RAW dependency */
  __asm__ __volatile__ ("mov ar.ccv=%0
                        ;;"
                        :
                        : "rO"(*old_val_addr));

  /* Use fence before cmpxchg for now. Use acquire consistancy sem with cmpxchg
     (memory write visible to all subsequent data memory accesses) */
  __asm__ __volatile__ (MEM_FENCE
                        "cmpxchg8.acq %0=[%1],%2,ar.ccv"

                        : "=r"(ret)

                        : "r"(word_addr),
                          "r"(new_val)

                        : "memory");

  if (ret == *old_val_addr)
    return 1;
  else
  {
    *old_val_addr = ret;
    return 0;
  }
}

#endif /* GPFS_ARCH_IA64 */


/* fetch_and_XXX and fetch_and_XXXlp operations */

/* With inlined functions we cannot use the standard trace statements, so
   for the atomic operations the USE_LOCK_TRACE must be toggled on to
   debug these operations (which fortunately shouldn't happen often). */
#undef USE_LOCK_TRACE

#ifdef USE_LOCK_TRACE
#ifdef _KERNEL
#define LOCK_TRACE printk
#else
#define LOCK_TRACE printf
#endif /* _KERNEL */
#else
#define LOCK_TRACE(X1,X2,X3,X4,X5,X6)
#endif /* USE_LOCK_TRACE */

static inline int 
fetch_and_add(atomic_p wd, int i)
{
  int ret, oldVal, newVal;
  oldVal = *wd;

  do
  {
    newVal = oldVal + i;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_add: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_addlp(atomic_l wd, long i)
{
  long oldVal, newVal;
  int  ret;

  oldVal = *wd;

  do
  {
    newVal = oldVal + i;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_addlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline int 
fetch_and_and(atomic_p wd, uint mask)
{
  int ret, oldVal,newVal;
  oldVal = *wd;

  do
  {
    newVal = oldVal & mask;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_and: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_andlp(atomic_l wd, ulong  mask)
{
  long oldVal,newVal;
  int ret;
  oldVal = *wd;

  do
  {
    newVal = oldVal & mask;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_andlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline int 
fetch_and_or(atomic_p wd, uint mask)
{
  int ret, oldVal,newVal;
  oldVal = *wd;

  do
  {
    newVal = oldVal | mask;
    ret = comp_and_swap((volatile int *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_or: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}

#ifdef __64BIT__
static inline long
fetch_and_orlp(atomic_l wd, ulong  mask)
{
  long oldVal,newVal;
  int ret;
  oldVal = *wd;

  do
  {
    newVal = oldVal | mask;
    ret = comp_and_swaplp((volatile long *)wd, &oldVal, newVal);

    LOCK_TRACE(
           "fetch_and_orlp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
           wd, *wd, oldVal, newVal, ret);

  } while (ret == 0);

  return oldVal;
}
#endif /* __64BIT__ */

static inline Boolean 
compare_and_swap(atomic_p wd, int *oldVal, int newVal)
{
  Boolean ret;

  ret = comp_and_swap((volatile int *)wd, oldVal, newVal);

  LOCK_TRACE(
         "compare_and_swap out: wd 0x%lX *wd 0x%lX old 0x%lX "
         "new 0x%lX ret %d\n", wd, *wd, *oldVal, newVal, ret);
  return ret;
}

#ifdef __64BIT__
static inline Boolean
compare_and_swaplp(atomic_l wd, long *oldVal, long newVal)
{
  Boolean ret;

  ret = comp_and_swaplp((volatile long *)wd, oldVal, newVal);

  LOCK_TRACE(
         "compare_and_swaplp out: wd 0x%lX *wd 0x%lX old 0x%lX "
         "new 0x%lX ret %d\n", wd, *wd, *oldVal, newVal, ret);
  return ret;
}
#endif /* __64BIT__ */

static inline Boolean
_check_lock(atomic_p wd, int oldVal, int newVal)
{
    int old_val_addr = oldVal;
    Boolean  ret;

    ret = comp_and_swap((volatile int *) wd, &old_val_addr, newVal);

    LOCK_TRACE(
         "_check_lock: wd 0x%X *wd 0x%X old 0x%X new 0x%X ret %d\n",
          wd, *wd, old_val_addr, newVal, ret);

    if (ret)
      return 0;
    else
      return 1;
}

#ifdef __64BIT__
static inline Boolean
_check_locklp(atomic_l wd, long oldVal, long newVal)
{
    long old_val_addr = oldVal;
    Boolean  ret;

    ret = comp_and_swaplp((volatile long *) wd, &old_val_addr, newVal);

    LOCK_TRACE(
         "_check_locklp: wd 0x%lX *wd 0x%lX old 0x%lX new 0x%lX ret %d\n",
          wd, *wd, old_val_addr, newVal, ret);

    if (ret)
      return 0;
    else
      return 1;
}
#endif /* __64BIT__ */

#endif /* _h_cxiAtomic_plat */
